Chapter 8 HMSC analysis

8.1 Load data

load("data/data.Rdata")
load("hmsc/fit_model1_250_10.Rdata")

8.2 Variance partitioning

# Compute variance partitioning
varpart=computeVariancePartitioning(m)

varpart$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(variable=factor(variable, levels=rev(c("origin","sex","logseqdepth","Random: location")))) %>%
   group_by(variable) %>%
   summarise(mean=mean(value)*100,sd=sd(value)*100) %>%
   tt()
tinytable_dvsiaqvo244qwlms4niw
variable mean sd
Random: location 37.900015 25.317903
logseqdepth 56.110626 25.796874
sex 4.937460 5.612719
origin 1.051899 1.282563
# Basal tree
varpart_tree <- genome_tree

#Varpart table
varpart_table <- varpart$vals %>%
   as.data.frame() %>%
   rownames_to_column(var="variable") %>%
   pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
   mutate(genome=factor(genome, levels=rev(varpart_tree$tip.label))) %>%
   mutate(variable=factor(variable, levels=rev(c("origin","sex","logseqdepth","Random: location"))))

#Phylums
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    mutate(phylum=str_remove_all(phylum, "p__"))%>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% varpart_tree$tip.label) %>%
    arrange(match(genome, varpart_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)

colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    mutate(phylum=str_remove_all(phylum, "p__"))%>%
  right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% varpart_tree$tip.label) %>%
    arrange(match(genome, varpart_tree$tip.label)) %>%
     select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Basal ggtree
varpart_tree <- varpart_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
# Add phylum colors next to the tree tips
varpart_tree <- gheatmap(varpart_tree, phylum_colors, offset=-0.2, width=0.1, colnames=FALSE) +
   scale_fill_manual(values=colors_alphabetic)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
varpart_tree <- varpart_tree + new_scale_fill()

# Add variance stacked barplot
vertical_tree <-  varpart_tree +
       scale_fill_manual(values=c("#506a96","#cccccc","#be3e2b","#f6de6c"))+
        geom_fruit(
             data=varpart_table,
             geom=geom_bar,
             mapping = aes(x=value, y=genome, fill=variable, group=variable),
             pwidth = 2,
             offset = 0.05,
             width= 1,
             orientation="y",
             stat="identity")+
      labs(fill="Variable")

vertical_tree

8.3 Posterior estimates

# Select desired support threshold
support=0.9
negsupport=1-support

# Basal tree
postestimates_tree <- genome_tree

# Posterior estimate table
post_beta <- getPostEstimate(hM=m, parName="Beta")$support %>%
    as.data.frame() %>%
    mutate(variable=m$covNames) %>%
    pivot_longer(!variable, names_to = "genome", values_to = "value") %>%
    mutate(genome=factor(genome, levels=rev(postestimates_tree$tip.label))) %>%
    mutate(value = case_when(
          value >= support ~ "Positive",
          value <= negsupport ~ "Negative",
          TRUE ~ "Neutral")) %>%
    mutate(value=factor(value, levels=c("Positive","Neutral","Negative"))) %>%
    pivot_wider(names_from = variable, values_from = value) %>%
    #select(genome,sp_vulgaris,area_semi,area_urban,sp_vulgarisxarea_semi,sp_vulgarisxarea_urban,season_spring,season_winter,sp_vulgarisxseason_spring,sp_vulgarisxseason_winter) %>%
    column_to_rownames(var="genome")

#Phylums
phylum_colors <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    mutate(phylum=str_remove_all(phylum, "p__")) %>%
    right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% postestimates_tree$tip.label) %>%
    arrange(match(genome, postestimates_tree$tip.label)) %>%
    mutate(phylum = factor(phylum, levels = unique(phylum))) %>%
    column_to_rownames(var = "genome") %>%
    select(phylum)


colors_alphabetic <- read_tsv("https://raw.githubusercontent.com/earthhologenome/EHI_taxonomy_colour/main/ehi_phylum_colors.tsv") %>%
    mutate(phylum=str_remove_all(phylum, "p__")) %>%
    right_join(genome_metadata, by=join_by(phylum == phylum)) %>%
    filter(genome %in% postestimates_tree$tip.label) %>%
    arrange(match(genome, postestimates_tree$tip.label)) %>%
     select(phylum, colors) %>%
    unique() %>%
    arrange(phylum) %>%
    select(colors) %>%
    pull()

# Basal ggtree
postestimates_tree <- postestimates_tree %>%
        force.ultrametric(.,method="extend") %>%
        ggtree(., size = 0.3)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#Add phylum colors next to the tree tips
postestimates_tree <- gheatmap(postestimates_tree, phylum_colors, offset=-0.2, width=0.1, colnames=FALSE) +
      scale_fill_manual(values=colors_alphabetic)+
      labs(fill="Phylum")

#Reset fill scale to use a different colour profile in the heatmap
postestimates_tree <- postestimates_tree + new_scale_fill()

# Add posterior significant heatmap

postestimates_tree <- gheatmap(postestimates_tree, post_beta, offset=0, width=0.5, colnames=TRUE, colnames_position="top",colnames_angle=90, colnames_offset_y=1, hjust=0) +
        scale_fill_manual(values=c("#be3e2b","#f4f4f4","#b2b530"))+
        labs(fill="Trend")

postestimates_tree +
        vexpand(.25, 1) # expand top 

8.4 Correlations

#Compute the residual correlation matrix
OmegaCor = computeAssociations(m)

# Refernece tree (for sorting genomes)
genome_tree_subset <- genome_tree %>%
        keep.tip(., tip=m$spNames) 


#Co-occurrence matrix at the animal level
supportLevel = 0.95
toPlot = ((OmegaCor[[1]]$support>supportLevel)
          + (OmegaCor[[1]]$support<(1-supportLevel))>0)*OmegaCor[[1]]$mean

matrix <- toPlot %>% 
      as.data.frame() %>%
      rownames_to_column(var="genome1") %>%
      pivot_longer(!genome1, names_to = "genome2", values_to = "cor") %>%
      mutate(genome1= factor(genome1, levels=genome_tree_subset$tip.label)) %>%
      mutate(genome2= factor(genome2, levels=genome_tree_subset$tip.label)) %>%
      ggplot(aes(x = genome1, y = genome2, fill = cor)) +
            geom_tile() + 
            scale_fill_gradient2(low = "#be3e2b",
                       mid = "#f4f4f4",
                       high = "#b2b530")+
            theme_void()

htree <- genome_tree_subset %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(.)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
vtree <- genome_tree_subset %>%
  force.ultrametric(.,method="extend") %>%
  ggtree(.)
***************************************************************
*                          Note:                              *
*    force.ultrametric does not include a formal method to    *
*    ultrametricize a tree & should only be used to coerce    *
*   a phylogeny that fails is.ultrametric due to rounding --  *
*    not as a substitute for formal rate-smoothing methods.   *
***************************************************************
#create composite figure
grid.arrange(grobs = list(matrix,vtree),
             layout_matrix = rbind(c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1),
                                   c(2,1,1,1,1,1,1,1,1,1,1,1)))

8.5 Predict responses

# Select modelchain of interest
load("hmsc/fit_model1_250_10.Rdata")

gradient = c("domestic","feral")
gradientlength = length(gradient)

#Treatment-specific gradient predictions
pred <- constructGradient(m, 
                      focalVariable = "origin", 
                      non.focalVariables = list(logseqdepth=list(1),location=list(1))) %>%
            predict(m, Gradient = ., expected = TRUE) %>%
            do.call(rbind,.) %>%
            as.data.frame() %>%
            mutate(origin=rep(gradient,1000)) %>%
            pivot_longer(!origin,names_to = "genome", values_to = "value")
# weights:  9 (4 variable)
initial  value 101.072331 
final  value 91.392443 
converged

8.5.0.1 Element level

elements_table <- genome_gifts %>%
    to.elements(., GIFT_db) %>%
    as.data.frame()

community_elements <- pred %>%
  group_by(origin, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "origin") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(elements_table,.,GIFT_db) %>% 
    as.data.frame() %>%
    rownames_to_column(var="origin")
   })

calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

element_predictions <- map_dfc(community_elements, function(mat) {
      mat %>%
        column_to_rownames(var = "origin") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_elements[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)
# Positively associated
element_predictions %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  filter(positive_support>=0.9) %>%
  tt()
tinytable_3739gnuibiujtzgufjt2
trait mean p1 p9 positive_support negative_support
D0205 0.012603387 2.593807e-03 0.022939556 0.954 0.046
D0208 0.010227729 1.642595e-03 0.018507180 0.945 0.055
D0906 0.003565801 1.749126e-04 0.007907557 0.928 0.072
D0504 0.004396738 2.650007e-04 0.009408255 0.921 0.079
B0103 0.009096237 8.776903e-04 0.017087166 0.916 0.084
D0507 0.003697711 6.655942e-05 0.007228665 0.903 0.097
element_predictions %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  filter(negative_support>=0.9) %>%
  tt()
tinytable_v96hdbgso2xv06yewxwl
trait mean p1 p9 positive_support negative_support
D0801 -0.001672597 -0.002483633 -9.833235e-05 0.014 0.986
D0802 -0.001672597 -0.002483633 -9.833235e-05 0.014 0.986
B0310 -0.013442775 -0.024190545 -3.605897e-03 0.025 0.975
D0517 -0.005167475 -0.008180822 -1.341754e-03 0.025 0.975
B0302 -0.005193974 -0.010525780 -6.418008e-04 0.032 0.968
B0709 -0.002110743 -0.003632081 -5.607182e-04 0.034 0.966
D0601 -0.009391013 -0.017719312 -1.923462e-03 0.034 0.966
D0611 -0.004341066 -0.009391699 -2.372652e-04 0.038 0.962
D0903 -0.004341066 -0.009391699 -2.372652e-04 0.038 0.962
B0219 -0.004355929 -0.009480552 -2.632315e-04 0.039 0.961
B0804 -0.017049337 -0.030998939 -3.672770e-03 0.044 0.956
D0807 -0.004530684 -0.008778695 -6.943753e-04 0.044 0.956
D0603 -0.002011208 -0.004002246 -2.931153e-04 0.047 0.953
D0817 -0.005309855 -0.010758020 -5.734667e-04 0.051 0.949
B0303 -0.012060364 -0.022094010 -2.735578e-03 0.052 0.948
D0610 -0.003193577 -0.005075296 -8.978410e-04 0.054 0.946
D0606 -0.006029744 -0.011505444 -8.948160e-04 0.061 0.939
B0603 -0.016268750 -0.032036510 -2.680562e-03 0.063 0.937
D0908 -0.016042713 -0.027618565 -4.081974e-03 0.063 0.937
B0214 -0.021066765 -0.037854042 -4.079732e-03 0.076 0.924
B0401 -0.011800076 -0.023901689 -9.968034e-04 0.078 0.922
D0816 -0.006097811 -0.012262606 -5.143077e-04 0.079 0.921
B0601 -0.009436323 -0.018855245 -6.283849e-04 0.083 0.917
D0508 -0.003593430 -0.007732534 -2.235608e-04 0.084 0.916
D0612 -0.002002036 -0.003174204 -7.712861e-05 0.088 0.912
B0204 -0.016246634 -0.032996834 -1.386663e-03 0.090 0.910
B0708 -0.025619028 -0.048674986 -2.910586e-04 0.097 0.903
B0309 -0.007653490 -0.015286326 -2.801407e-05 0.099 0.901
positive <- element_predictions %>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

negative <- element_predictions %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

bind_rows(positive,negative) %>%
  left_join(GIFT_db,by=join_by(trait==Code_element)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive$trait),rev(negative$trait)))) %>%
  ggplot(aes(x=mean, y=fct_rev(trait), xmin=p1, xmax=p9, color=Function)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.04,0.04)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = c("#debc14","#440526","#dc7c17","#172742","#debc14","#440526","#dc7c17","#172742","#357379","#6c7e2c","#d8dc69","#774d35","#db717d")) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait")

8.5.0.2 Function level

functions_table <- elements_table %>%
    to.functions(., GIFT_db) %>%
    as.data.frame()

community_functions <- pred %>%
  group_by(origin, genome) %>%
  mutate(row_id = row_number()) %>%
  pivot_wider(names_from = genome, values_from = value) %>%
  ungroup() %>%
  group_split(row_id) %>%
  as.list() %>%
  lapply(., FUN = function(x){x %>%
    select(-row_id) %>%
    column_to_rownames(var = "origin") %>%
    as.data.frame() %>%
    exp() %>%
    t() %>%
    tss() %>%
    to.community(functions_table,.,GIFT_db) %>% 
    as.data.frame() %>%
    rownames_to_column(var="origin")
   })
#max-min option
calculate_slope <- function(x) {
  lm_fit <- lm(unlist(x) ~ seq_along(unlist(x)))
  coef(lm_fit)[2]
}

function_predictions <- map_dfc(community_functions, function(mat) {
      mat %>%
        column_to_rownames(var = "origin") %>%
        t() %>%
        as.data.frame() %>%
        rowwise() %>%
        mutate(slope = calculate_slope(c_across(everything()))) %>%
        select(slope) }) %>%
      t() %>%
      as.data.frame() %>%
      set_names(colnames(community_functions[[1]])[-1]) %>%
      rownames_to_column(var="iteration") %>%
      pivot_longer(!iteration, names_to="trait",values_to="value") %>%
      group_by(trait) %>%
      summarise(mean=mean(value),
        p1 = quantile(value, probs = 0.1),
        p9 = quantile(value, probs = 0.9),
        positive_support = sum(value > 0)/1000,
        negative_support = sum(value < 0)/1000) %>%
      arrange(-positive_support)

# Positively associated
function_predictions %>%
  filter(mean >0) %>%
  arrange(-positive_support) %>%
  tt()
tinytable_wfpyv0uhuli9vawschcx
trait mean p1 p9 positive_support negative_support
D02 0.0084245611 -0.002708007 0.020025834 0.836 0.164
B08 0.0074961511 -0.003350089 0.018284003 0.791 0.209
B01 0.0011568863 -0.005731988 0.008379737 0.625 0.375
S01 0.0007429401 -0.011690216 0.013752273 0.570 0.430
# Negatively associated
function_predictions %>%
  filter(mean <0) %>%
  arrange(-negative_support) %>%
  tt()
tinytable_4wv28v0nppjw1ewc0aez
trait mean p1 p9 positive_support negative_support
B03 -1.112411e-02 -0.0187246652 -0.0039909543 0.032 0.968
D08 -1.214619e-03 -0.0023722116 -0.0002337339 0.034 0.966
D06 -3.261848e-03 -0.0071802725 0.0001432296 0.108 0.892
B04 -8.359836e-03 -0.0185309424 0.0011794391 0.135 0.865
D07 -1.206714e-02 -0.0304543194 0.0040195356 0.182 0.818
B06 -6.715638e-03 -0.0162987317 0.0033156299 0.188 0.812
D05 -2.065349e-03 -0.0078384328 0.0033380053 0.209 0.791
D03 -4.331512e-03 -0.0134588083 0.0035668705 0.224 0.776
S03 -1.020199e-02 -0.0337194527 0.0150239871 0.247 0.753
B02 -4.000958e-03 -0.0128546917 0.0041689091 0.263 0.737
D09 -2.283353e-03 -0.0088140914 0.0048311247 0.312 0.688
S02 -4.751123e-03 -0.0151394921 0.0031706467 0.325 0.675
B07 -4.054452e-03 -0.0158206337 0.0080154534 0.331 0.669
B09 -2.002830e-05 -0.0005311183 0.0004142376 0.361 0.639
D01 -4.373472e-04 -0.0049969959 0.0041040520 0.410 0.590
B10 -1.442446e-05 -0.0003132959 0.0002538415 0.473 0.527
positive <- function_predictions %>%
  filter(mean >0) %>%
  arrange(mean) %>%
  filter(positive_support>=0.9) %>%
  select(-negative_support) %>%
  rename(support=positive_support)

negative <- function_predictions %>%
  filter(mean <0) %>%
  arrange(mean) %>%
  filter(negative_support>=0.9) %>%
  select(-positive_support) %>%
  rename(support=negative_support)

bind_rows(positive,negative) %>%
  left_join(GIFT_db,by=join_by(trait==Code_function)) %>%
  mutate(trait=factor(trait,levels=c(rev(positive$trait),rev(negative$trait)))) %>%
  ggplot(aes(x=mean, y=fct_rev(trait), xmin=p1, xmax=p9, color=Function)) +
      geom_point() +
      geom_errorbar() +
      xlim(c(-0.02,0.02)) +
      geom_vline(xintercept=0) +
      scale_color_manual(values = c("#debc14","#440526","#dc7c17","#172742","#debc14","#440526","#dc7c17","#172742","#357379","#6c7e2c","#d8dc69","#774d35","#db717d")) +
      theme_minimal() +
      labs(x="Regression coefficient",y="Functional trait")